This file includes the code for computing the regression results regarding the predictors of the evolution of self-related health during the FinDonor study for the
We load the data and assign groups as they were assigned in the first part of the study.
#Load questionnaire data
load(file = "../data/final_data.rdata")
final_data <- data
#load measurement data
load(file = "../data/quest_2_donor_data.rdata")
FD_activity_biomarker_data <- data %>%
rename(Ferritin_beginning = Ferritin)
first_questionnaire_data <- final_data %>% filter(questionnaire == "First")
second_questionnaire_data <- final_data %>% filter(questionnaire == "Second")
rm(data)
We remove donors with very high ferritin (/Ferritin > 400) and high CRP (> 30)
We compute for each donor their wight at first donation and then the difference in reported BMI between the first questionnaire and the last questionnaire.
We then build a categorical variable for smoking status. This variable will need to be dummy coded into 3 variables if it is used as a regression variable.
We will apply this coding as it was applied in a recent epidemiological study in the Japanese population. Importantly we will consider only changes from smoker to non-smoker, and not changes fron “daily” to “occasional” smoker, as was done in the previously cited paper(NOTE: they had much more precise quantification of smoking behavior (see questionnaire ) and a much larger N and still used a binary smoking/non-smoking catergorisation). However, in the modelling stage we find that smoking can not be used in the models.
We also add age to the dataframe.
The data object FD_activity_biomarker_data includes all necessary variables for implenting the model.
FD_activity_biomarker_data <-
final_data %>%
group_by(donor) %>%
# Get staring weight
filter(questionnaire =="First") %>%
rename(starting_weight = weight) %>%
dplyr::select(donor, starting_weight) %>%
full_join(final_data, by = "donor") %>%
# Get difference in BMI
dplyr::select(donor, starting_weight, BMI, questionnaire) %>%
spread(key = questionnaire, value = BMI ) %>%
mutate(BMI_diff = Second - First) %>%
dplyr::select(-First, -Second) %>%
full_join(final_data, by = "donor") %>%
# GEt smoking status evolution
dplyr::select(donor, starting_weight, BMI_diff, smoking, questionnaire) %>%
spread(key = questionnaire, value = smoking) %>%
mutate(smoking_behavior = case_when(First == "no" & Second == "no" ~"never smoker",
First == "no" & Second %in% c("daily", "sometimes") ~ "new smoker",
First %in% c("daily", "sometimes") & Second %in% c("daily", "sometimes") ~ "always smoker",
First %in% c("daily", "sometimes") & Second == "no" ~"Former smoker",
TRUE ~"NA")) %>%
dplyr::select(-First, -Second) %>%
full_join(final_data, by = "donor") %>%
dplyr::select(donor, starting_weight, BMI_diff, health, smoking_behavior, questionnaire) %>%
# Get health status evolution
spread(key = questionnaire, value = health) %>%
mutate(health_evolution = case_when(First < Second ~ "Improved",
First == Second ~ "Stable",
Second < First ~ "Worsened",
TRUE ~"NA")) %>%
dplyr::select(-First, -Second) %>%
# Add number of months between donation events
full_join(final_data, by = "donor") %>%
dplyr::select(donor, starting_weight, BMI_diff, health_evolution, smoking_behavior, date, questionnaire,group) %>%
spread(key = questionnaire, value = date) %>%
mutate(interval_between_quest = interval(First, Second) %>%
as.duration() %>%
as.numeric( "months") %>%
round(0)) %>%
dplyr::select(-First, -Second) %>%
# Add donor age
left_join(first_questionnaire_data %>%
dplyr::select(donor, age),
by = "donor") %>%
filter( group != "Women_pre_menop_no_mens") %>%
# Exckude donors with no measure of health evolution
left_join(FD_activity_biomarker_data, by = "donor") %>%
filter(health_evolution != "NA") %>%
drop_na(starting_weight,BMI_diff ) %>%
filter(Ferritin_beginning <= 400 & CRP < 30)
final_data <-
final_data %>%
filter(donor %in% FD_activity_biomarker_data$donor)
We first count how many donors we have in each group.
final_data %>%
filter(questionnaire == "First") %>%
count(group)
## # A tibble: 3 x 2
## group n
## <fct> <int>
## 1 Men 589
## 2 Post_menopause_women 353
## 3 Pre_menopause_women 474
We select the variabels that will be included in the models.
We also run some trasnformations to make the OR more interpretable:
regression_data <-
FD_activity_biomarker_data %>%
inner_join(first_questionnaire_data %>%
dplyr::select(donor, health),
by = "donor") %>%
mutate(health_very_good = ifelse(health == "Very_good",1,0),
health_excellent = ifelse(health == "Excellent", 1, 0),
health_poor = ifelse(health == "Poor", 1, 0),
health_satisfactory = ifelse(health == "Satisfactory", 1, 0)
) %>%
mutate(health_outcome = ifelse(health_evolution == "Worsened", 1,0)) %>%
mutate(health_outcome_neg = ifelse(health_evolution == "Improved", 1,0)) %>%
mutate(donation_frequency = nb_donations_between_questionnaires/(interval_between_quest/12) ) %>%
mutate(log_Ferritin_beginning = log(Ferritin_beginning)/log(2),
starting_weight_o =starting_weight,
starting_weight = starting_weight/5,
age_o = age,
age = age/5,
Hb_beginning_o = Hb_v,
Hb_beginning = Hb_v/5) %>%
dplyr::select(donor, starting_weight,BMI_diff, age, group, log_Ferritin_beginning, Hb_beginning,
donation_frequency,health_poor,health_satisfactory, health_very_good,health_excellent,health_outcome,health_outcome_neg, smoking_behavior,Ferritin_beginning,age_o,starting_weight_o,Hb_beginning_o )
regression_data$smoking_behavior <- factor(regression_data$smoking_behavior)
regression_data <- droplevels(regression_data)
file <- "../results/hypregdata_unscaled.rdata"
save(regression_data,file=file)
regression_data %>% group_by(group) %>%
count()
## # A tibble: 3 x 2
## # Groups: group [3]
## group n
## <fct> <int>
## 1 Men 589
## 2 Post_menopause_women 353
## 3 Pre_menopause_women 474
# myVars <- c( "age_o", "starting_weight_o" ,"BMI_diff", "Hb_beginning_o", "Ferritin_beginning",
# "donation_frequency","initial_health","final_health")
myVars <- c( "Age" ,
"Initial weight (kg)" ,
"BMI difference (second - first)" ,
"Initial hemoglobin (g/l)" ,
"Initial ferritin (ug/l)" ,
"Donation frequency (yearly)" ,
"Inital health rating",
"Final health rating")
non_normal_vars <- c("Initial ferritin (ug/l)")
health1<- first_questionnaire_data %>% dplyr::select(donor,health) %>% rename(initial_health=health)
health2<- second_questionnaire_data %>% dplyr::select(donor,health) %>% rename(final_health=health)
table1data <- regression_data %>% left_join(health1) %>% left_join(health2)
table1data <- table1data %>% dplyr::select(group,age_o,starting_weight_o,BMI_diff,Hb_beginning_o,Ferritin_beginning,donation_frequency,initial_health,final_health) %>%
rename(
"Age" = age_o,
"Initial weight (kg)" = starting_weight_o ,
"BMI difference (second - first)" = BMI_diff,
"Initial hemoglobin (g/l)" = Hb_beginning_o,
"Initial ferritin (ug/l)" = Ferritin_beginning,
"Donation frequency (yearly)" =donation_frequency,
"Inital health rating"=initial_health,
"Final health rating"= final_health
)
summary_table <- CreateTableOne(data = table1data,vars=myVars, strata = "group",test = FALSE)
tab3Mat <- print(summary_table, nonnormal = non_normal_vars,vars=myVars, quote = FALSE, noSpaces = TRUE, printToggle = FALSE)
tab3Mat %>%
kable()
| Men | Post_menopause_women | Pre_menopause_women | |
|---|---|---|---|
| n | 589 | 353 | 474 |
| Age (mean (SD)) | 47.65 (13.20) | 58.58 (5.57) | 35.51 (10.04) |
| Initial weight (kg) (mean (SD)) | 85.23 (14.38) | 71.09 (12.86) | 70.74 (14.06) |
| BMI difference (second - first) (mean (SD)) | 0.23 (1.39) | 0.28 (1.53) | 0.90 (1.91) |
| Initial hemoglobin (g/l) (mean (SD)) | 150.38 (9.64) | 138.07 (8.03) | 135.28 (7.80) |
| Initial ferritin (ug/l) (median [IQR]) | 41.00 [25.00, 68.00] | 33.00 [21.00, 51.00] | 25.00 [16.00, 41.00] |
| Donation frequency (yearly) (mean (SD)) | 3.00 (1.40) | 2.41 (0.94) | 1.92 (0.96) |
| Inital health rating (%) | |||
| Poor | 0 (0.0) | 0 (0.0) | 0 (0.0) |
| Satisfactory | 15 (2.5) | 6 (1.7) | 9 (1.9) |
| Good | 175 (29.7) | 142 (40.2) | 156 (32.9) |
| Very_good | 294 (49.9) | 146 (41.4) | 236 (49.8) |
| Excellent | 105 (17.8) | 59 (16.7) | 73 (15.4) |
| Final health rating (%) | |||
| Poor | 2 (0.3) | 1 (0.3) | 2 (0.4) |
| Satisfactory | 36 (6.1) | 30 (8.5) | 28 (5.9) |
| Good | 213 (36.2) | 136 (38.5) | 195 (41.1) |
| Very_good | 256 (43.5) | 149 (42.2) | 192 (40.5) |
| Excellent | 82 (13.9) | 37 (10.5) | 57 (12.0) |
write.csv(tab3Mat, file = paste0("../results/table_1_population_",gsub("-","",as.Date(Sys.time())),".csv"))
regression_data_pre_menop_women <-
regression_data %>%
filter(group == "Pre_menopause_women") %>%
dplyr::select(donor, age, starting_weight, BMI_diff,
log_Ferritin_beginning, Hb_beginning, donation_frequency) %>%
gather(key = key, value = value, -donor) %>%
group_by(key) %>%
mutate(value_scale = scale(value, scale = FALSE)[,1]) %>%
dplyr::select(-value) %>%
spread(key = key, value = value_scale) %>%
inner_join(regression_data %>%
dplyr::select(donor,health_poor,health_satisfactory, health_very_good, health_excellent, health_outcome,health_outcome_neg,smoking_behavior),
by = "donor")
logit_pre_menop_women <- glm(health_outcome ~ age + starting_weight + BMI_diff +
log_Ferritin_beginning + Hb_beginning +
donation_frequency + health_very_good + health_excellent,
data=regression_data_pre_menop_women,
family="binomial")
summary(logit_pre_menop_women)
##
## Call:
## glm(formula = health_outcome ~ age + starting_weight + BMI_diff +
## log_Ferritin_beginning + Hb_beginning + donation_frequency +
## health_very_good + health_excellent, family = "binomial",
## data = regression_data_pre_menop_women)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6542 -0.9426 -0.4128 1.0571 2.5083
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.37444 0.27418 -8.660 < 2e-16 ***
## age 0.05216 0.05786 0.902 0.3673
## starting_weight 0.12963 0.04129 3.139 0.0017 **
## BMI_diff 0.11202 0.05750 1.948 0.0514 .
## log_Ferritin_beginning -0.03595 0.11678 -0.308 0.7582
## Hb_beginning 0.00524 0.07461 0.070 0.9440
## donation_frequency -0.14188 0.11567 -1.227 0.2200
## health_very_good 2.05066 0.30789 6.660 2.73e-11 ***
## health_excellent 2.92329 0.37726 7.749 9.28e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 604.79 on 473 degrees of freedom
## Residual deviance: 509.53 on 465 degrees of freedom
## AIC: 527.53
##
## Number of Fisher Scoring iterations: 5
plot(logit_pre_menop_women)
vif(logit_pre_menop_women)
## age starting_weight BMI_diff
## 1.160649 1.187453 1.047510
## log_Ferritin_beginning Hb_beginning donation_frequency
## 1.212132 1.135600 1.043167
## health_very_good health_excellent
## 1.900105 1.941321
tidy_logit_pre_menop_women <-
logit_pre_menop_women %>%
tidy %>%
mutate(OR = exp(estimate),
group = "Pre_menopause_women")
regression_data_post_menop_women <-
regression_data %>%
filter(group == "Post_menopause_women") %>%
dplyr::select(donor, age, starting_weight, BMI_diff,
log_Ferritin_beginning,Hb_beginning,donation_frequency) %>%
gather(key = key, value = value, -donor) %>%
group_by(key) %>%
mutate(value_scale = scale(value, scale = FALSE)[,1]) %>%
dplyr::select(-value) %>%
spread(key = key, value = value_scale) %>%
inner_join(regression_data %>%
dplyr::select(donor, health_poor,health_satisfactory,health_very_good, health_excellent, health_outcome,health_outcome_neg,smoking_behavior),
by = "donor")
logit_post_menop_women <- glm(health_outcome ~ age + starting_weight + BMI_diff +
log_Ferritin_beginning + Hb_beginning + donation_frequency +
health_very_good + health_excellent,
data=regression_data_post_menop_women,
family="binomial")
summary(logit_post_menop_women)
##
## Call:
## glm(formula = health_outcome ~ age + starting_weight + BMI_diff +
## log_Ferritin_beginning + Hb_beginning + donation_frequency +
## health_very_good + health_excellent, family = "binomial",
## data = regression_data_post_menop_women)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6885 -0.7666 -0.5016 0.8962 2.3779
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.128759 0.268687 -7.923 2.32e-15 ***
## age 0.001707 0.122117 0.014 0.98885
## starting_weight 0.132685 0.055185 2.404 0.01620 *
## BMI_diff 0.261035 0.089226 2.926 0.00344 **
## log_Ferritin_beginning 0.001927 0.152094 0.013 0.98989
## Hb_beginning -0.009034 0.088292 -0.102 0.91851
## donation_frequency -0.387220 0.144547 -2.679 0.00739 **
## health_very_good 1.331872 0.325173 4.096 4.21e-05 ***
## health_excellent 2.557690 0.407650 6.274 3.51e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 417.03 on 352 degrees of freedom
## Residual deviance: 357.36 on 344 degrees of freedom
## AIC: 375.36
##
## Number of Fisher Scoring iterations: 4
tidy_logit_post_menop_women <-
logit_post_menop_women %>%
tidy %>%
mutate(OR = exp(estimate),
group = "Post_menopause_women")
We check that there is no problematic collinearity betwen regressors (VIF < 5)
vif(logit_post_menop_women)
## age starting_weight BMI_diff
## 1.108134 1.157356 1.139544
## log_Ferritin_beginning Hb_beginning donation_frequency
## 1.130102 1.159486 1.059071
## health_very_good health_excellent
## 1.557766 1.729163
plot(logit_post_menop_women)
regression_data_men <-
regression_data %>%
filter(group == "Men") %>%
dplyr::select(donor, age, starting_weight, BMI_diff,
log_Ferritin_beginning, Hb_beginning,
donation_frequency) %>%
gather(key = key, value = value, -donor) %>%
group_by(key) %>%
mutate(value_scale = scale(value, scale = FALSE)[,1]) %>%
dplyr::select(-value) %>%
spread(key = key, value = value_scale) %>%
inner_join(regression_data %>%
dplyr::select(donor, health_poor,health_satisfactory, health_very_good, health_excellent,health_outcome,health_outcome_neg,smoking_behavior),
by = "donor")
logit_men <- glm(health_outcome ~ age + starting_weight + BMI_diff +
log_Ferritin_beginning + Hb_beginning + donation_frequency +
health_very_good + health_excellent,
data=regression_data_men,
family="binomial")
summary(logit_men)
##
## Call:
## glm(formula = health_outcome ~ age + starting_weight + BMI_diff +
## log_Ferritin_beginning + Hb_beginning + donation_frequency +
## health_very_good + health_excellent, family = "binomial",
## data = regression_data_men)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5230 -0.8189 -0.4643 0.9546 2.5087
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.53503 0.26804 -9.458 < 2e-16 ***
## age 0.02350 0.04026 0.584 0.55941
## starting_weight 0.17939 0.03660 4.902 9.48e-07 ***
## BMI_diff 0.10428 0.07410 1.407 0.15933
## log_Ferritin_beginning -0.28867 0.11635 -2.481 0.01310 *
## Hb_beginning 0.07268 0.05482 1.326 0.18495
## donation_frequency -0.20859 0.07955 -2.622 0.00874 **
## health_very_good 1.79474 0.29514 6.081 1.19e-09 ***
## health_excellent 2.72150 0.34306 7.933 2.14e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 706.07 on 588 degrees of freedom
## Residual deviance: 598.39 on 580 degrees of freedom
## AIC: 616.39
##
## Number of Fisher Scoring iterations: 5
library(epiDisplay)
logistic.display(logit_men)
##
## Logistic regression predicting health_outcome
##
## crude OR(95%CI) adj. OR(95%CI)
## age (cont. var.) 0.97 (0.9,1.03) 1.02 (0.95,1.11)
##
## starting_weight (cont. var.) 1.13 (1.06,1.2) 1.2 (1.11,1.29)
##
## BMI_diff (cont. var.) 1.09 (0.96,1.24) 1.11 (0.96,1.28)
##
## log_Ferritin_beginning (cont. var.) 0.93 (0.78,1.12) 0.75 (0.6,0.94)
##
## Hb_beginning (cont. var.) 1.06 (0.97,1.16) 1.08 (0.97,1.2)
##
## donation_frequency (cont. var.) 0.91 (0.8,1.04) 0.81 (0.69,0.95)
##
## health_very_good: 1 vs 0 1.58 (1.1,2.26) 6.02 (3.37,10.73)
##
## health_excellent: 1 vs 0 3.4 (2.2,5.26) 15.2 (7.76,29.78)
##
## P(Wald's test) P(LR-test)
## age (cont. var.) 0.559 0.559
##
## starting_weight (cont. var.) < 0.001 < 0.001
##
## BMI_diff (cont. var.) 0.159 0.158
##
## log_Ferritin_beginning (cont. var.) 0.013 0.012
##
## Hb_beginning (cont. var.) 0.185 0.184
##
## donation_frequency (cont. var.) 0.009 0.008
##
## health_very_good: 1 vs 0 < 0.001 < 0.001
##
## health_excellent: 1 vs 0 < 0.001 < 0.001
##
## Log-likelihood = -299.1957
## No. of observations = 589
## AIC value = 616.3914
tidy_logit_men <-
logit_men %>%
tidy %>%
mutate(OR = exp(estimate),
group = "Men")
plot(logit_men)
vif(logit_men)
## age starting_weight BMI_diff
## 1.102262 1.121451 1.021828
## log_Ferritin_beginning Hb_beginning donation_frequency
## 1.246534 1.140851 1.187989
## health_very_good health_excellent
## 2.075282 2.175625
reg_data<- list(men=regression_data_men,womenpost=regression_data_post_menop_women,womenpre=regression_data_pre_menop_women)
file <- "../results/hypregdata.rdata"
save(reg_data,file=file)
# https://www.painblogr.org/2017-10-18-purrring-through-bootstraps.html
get_coefficients <- function(data, boot_ind){
fit <- glm (health_outcome~.,
data[boot_ind,],
family = "binomial")
return(coef(fit))
}
compute_Bca_CI <-function(fit_boot){
Bca_inf = rep(0, length(fit_boot$t0))
Bca_sup = rep(0, length(fit_boot$t0))
for (i_regressor in 1:length(fit_boot$t0)){
CI <- boot.ci(fit_boot, type = "bca", index=i_regressor)
Bca_inf[i_regressor] <- CI$bca[4]
Bca_sup[i_regressor] <- CI$bca[5]
}
return(tibble(Bca_inf,Bca_sup, regressor = names(fit_boot$t0)) %>%
filter(regressor != "(Intercept)"))
}
get_bootstrap <- function(data, nb_boot){
fit_boot <- boot(data, statistic= get_coefficients, R = nb_boot)
fit_boot_distrib <- as.tibble(fit_boot$t)
names(fit_boot_distrib) <- names(fit_boot$t0)
fit_boot_Bca <- compute_Bca_CI(fit_boot)
return(list(fit_boot_distrib,fit_boot_Bca))
}
get_bootstrap_coeffs <- function(regression_data, current_group, nb_boot )
{
## preprocess and standardize data
test_data_std <- regression_data %>%
filter(group == current_group) %>%
dplyr::select(donor, age, starting_weight, BMI_diff,
log_Ferritin_beginning, Hb_beginning,
donation_frequency) %>%
gather(key = key, value = value, -donor) %>%
group_by(key) %>%
mutate(value_scale = scale(value, scale = FALSE)[,1]) %>%
dplyr::select(-value) %>%
spread(key = key, value = value_scale) %>%
inner_join(regression_data %>%
dplyr::select(donor, health_outcome, health_very_good, health_excellent),
by = "donor") %>%
dplyr::select(-donor)
result <- test_data_std %>%
get_bootstrap(nb_boot = nb_boot)
bootstrap_distrib <- result[[1]] %>%
gather(key = regressor, value = coefficient) %>%
filter(regressor != "(Intercept)") %>%
mutate(group = current_group)
Bca_CI <- result[[2]] %>% mutate(group = current_group)
return(list(bootstrap_distrib,Bca_CI))
}
output_file_distrib = ("../results/bootstraps/coeff_boot_distrib_seed_125_ctr_strata_final.rdata")
output_file_Bca = ("../results/bootstraps/coeff_boot_Bca_seed_125_ctr_strata_final.rdata")
#get_from_file_ferr = TRUE
get_from_file_ferr <- all(file.exists(output_file_distrib),file.exists(output_file_Bca) )
set.seed(125)
group_str = "Pre_menopause_women"
if (get_from_file_ferr){
load(file=output_file_distrib)
load(file=output_file_Bca)
}else
{
for(group_str in c("Pre_menopause_women", "Post_menopause_women","Men")){
stuff <- get_bootstrap_coeffs(regression_data, group_str, nb_boot = 10000)
if(!exists("bootstrap_distrib")){
bootstrap_distrib <- stuff[[1]]
bootstrap_Bca_CI <- stuff[[2]]
}else
{
bootstrap_distrib<-bind_rows(bootstrap_distrib, stuff[[1]])
bootstrap_Bca_CI<-bind_rows(bootstrap_Bca_CI, stuff[[2]])
}
}
save(bootstrap_distrib,file=output_file_distrib)
save(bootstrap_Bca_CI,file=output_file_Bca)
}
# "\U0394 BMI"
regressor_values <- c(
"age" = "Age",
"BMI_diff" = "BMI difference",
"donation_frequency" = "Donation frequency",
"Hb_beginning" = "Initial Hemoglobin",
"log_Ferritin_beginning" ="Initial Ferritin",
"starting_weight" ="Initial weight",
"health_very_good" ="Initial health rating \n Very good vs Satisfactory/Good",
"health_excellent" = "Initial health rating \n Excellent vs Satisfactory/Good"
)
bootstrap_Bca_CI <-
tidy_logit_men %>%
bind_rows(tidy_logit_post_menop_women) %>%
bind_rows(tidy_logit_pre_menop_women) %>%
dplyr::select(group, term, OR, p.value) %>%
rename(regressor = term) %>%
full_join(bootstrap_Bca_CI,
by = c("regressor", "group")) %>%
filter(regressor != "(Intercept)") %>%
mutate(Bca_inf = exp(Bca_inf),
Bca_sup = exp(Bca_sup),
is_sig = p.value < 0.05) %>%
mutate(regressor = plyr::revalue(regressor, regressor_values),
regressor = ordered(regressor,
levels = c("Age", "Initial weight", "BMI difference", "Initial Hemoglobin",
"Initial Ferritin","Donation frequency",
"Initial health rating \n Very good vs Satisfactory/Good",
"Initial health rating \n Excellent vs Satisfactory/Good")),
regressor = fct_rev(regressor),
group = case_when(group == "Pre_menopause_women" ~ "Pre-menopausal women",
group == "Post_menopause_women" ~ "Post-menopausal women",
TRUE ~ "Men"),
group = ordered(group, levels = c( "Pre-menopausal women", "Post-menopausal women", "Men" ) ))
file <- "../results/hyp_bootstrap_Bca_CI.csv"
temp <- bootstrap_Bca_CI %>% mutate_if(is.numeric, round, digits = 3) %>% dplyr::select(regressor,group,p.value,OR,Bca_inf,Bca_sup) %>% arrange(desc(regressor),group)
write.csv(temp ,file = file,row.names = FALSE)
# "\U0394 BMI"
bootstrap_distrib %>%
mutate(regressor = plyr::revalue(regressor, regressor_values),
regressor = ordered(regressor,
levels = c("Age", "Initial weight", "BMI difference", "Initial Hemoglobin",
"Initial Ferritin","Donation frequency",
"Initial health rating \n Very good vs Satisfactory/Good",
"Initial health rating \n Excellent vs Satisfactory/Good")),
regressor = fct_rev(regressor),
group = case_when(group == "Pre_menopause_women" ~ "Pre-menopausal women",
group == "Post_menopause_women" ~ "Post-menopausal women",
TRUE ~ "Men"),
group = ordered(group, levels = c( "Men", "Post-menopausal women", "Pre-menopausal women" ) )) %>%
mutate(OR = exp(coefficient)) %>%
ggplot(aes(x = regressor, y = OR, color = group, fill = group)) +
geom_hline(yintercept = 1 , color = "grey", linetype = "dotted",
size = 1) +
# geom_violin(alpha = 0.15, trim = TRUE, position = position_dodge(width = 0.8), size = 0.1) +
geom_pointrange(data = bootstrap_Bca_CI,
aes(ymin=Bca_inf, ymax = Bca_sup, shape = is_sig, color = group, fill = group),
position = position_dodge(width = 0.7), size = 1) +
# geom_point(data = bootstrap_Bca_CI,
# aes(shape = is_sig, fill = group),
# position = position_dodge(width = 0.8), size = 2) +
scale_color_manual(values = c( "#0065BD", "#BA0303", "#C50084" ),
limits = c("Men", "Post-menopausal women","Pre-menopausal women" )) +
scale_fill_manual(values = c( "#0065BD", "#BA0303", "#C50084" ),
limits = c("Men", "Post-menopausal women","Pre-menopausal women" )) +
scale_shape_manual(values = c(1,16)) +
scale_y_log10(limits=c(1/40,40)) +
# scale_x_discrete(labels=parse(text=unique(bootstrap_Bca_CI$regressor)))
# ylim(1/35, 35) +
guides(shape = FALSE) +
theme_classic() +
# theme(legend.position = c(0.8,0.9),
# legend.title = element_blank(),
# legend.box = "vertical",
# legend.text = element_text(size = 20),
# plot.title = element_text(size = 24,
# hjust = 0),
# panel.grid.minor.y = element_blank(),
# axis.line = element_line(colour="black"),
# axis.title.y = element_blank(),
# axis.title.x = element_text(size = 24),
# axis.text = element_text(size = 22),
# panel.grid.major.y = element_blank()) +
coord_flip() +
xlab("Robust standardized coefficient") +
labs(title = NULL,
subtitle = NULL) +
theme(axis.title = element_text(size = 14),
strip.text = element_text(size = 14),
legend.text = element_text(size = 12),
strip.background = element_rect(fill = "white"),
strip.background.x = element_blank(),
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 14, angle = 30,hjust = 0.8),
plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 16,
hjust = 0.5))
#
ggplot2::ggsave(filename = "../results/figures/hyp_log_reg_results_b.pdf",
width = 35,
height = 18,
dpi = 600,
units = "cm")
fig.width=15
fig.height=20
phbd <- final_data %>%
dplyr::select(donor, group, health, questionnaire) %>%
drop_na(group) %>%
filter( group != "Women_pre_menop_no_mens") %>%
spread(key = questionnaire, value = health) %>%
filter(First != "NA" & Second != "NA") %>%
mutate(Difference = case_when(First < Second ~ "Improved",
First == Second ~ "Stable",
TRUE ~ "Worsened")) %>%
mutate(Difference = ordered(Difference,levels=c("Worsened","Stable","Improved"))) %>%
inner_join(FD_activity_biomarker_data, by = c("donor", "group")) %>%
mutate(group = case_when(group == "Pre_menopause_women" ~ "Pre-menopausal \n women",
group == "Post_menopause_women" ~ "Post-menopausal \n women",
TRUE ~ "Men"),
group = ordered(group, levels = c( "Men", "Post-menopausal \n women", "Pre-menopausal \n women") ))
phb <- ggplot(phbd,aes(x = Difference, y = Hb_v, color = group)) +
# geom_hline(yintercept = 15, color = "red", linetype = 3) +
geom_boxplot(outlier.alpha = 0) +
geom_beeswarm(alpha = 0.5, shape = 1) +
# scale_y_log10() +
facet_grid(group ~.,scales="free_y") +
scale_color_manual(values = c( "#ff85a2", "#de2d26","#00BFFF" ),
limits = c("Pre-menopausal \n women", "Post-menopausal \n women","Men" )) +
theme_bw() +
theme(legend.position = "none") +
xlab("Evolution of self-rated health") +
ylab("Initial hemoglobin (g/l)")
ggplot2::ggsave(plot=phb,filename = "../results/figures/hyp_hb.pdf",
width = fig.width,
height = fig.height,
dpi = 600,
units = "cm")
phb
phbd %>% group_by(group, Difference) %>% summarise(mean=mean(Hb_v),median=median(Hb_v))
## # A tibble: 9 x 4
## # Groups: group [3]
## group Difference mean median
## <ord> <ord> <dbl> <dbl>
## 1 Men Worsened 151. 149
## 2 Men Stable 150. 149
## 3 Men Improved 150. 150
## 4 "Post-menopausal \n women" Worsened 138. 137
## 5 "Post-menopausal \n women" Stable 138. 139
## 6 "Post-menopausal \n women" Improved 136. 138
## 7 "Pre-menopausal \n women" Worsened 136. 135
## 8 "Pre-menopausal \n women" Stable 135. 135
## 9 "Pre-menopausal \n women" Improved 135. 135
phbd %>% group_by(group) %>% summarise(mean=mean(Hb_v),median=median(Hb_v))
## # A tibble: 3 x 3
## group mean median
## <ord> <dbl> <dbl>
## 1 Men 150. 149
## 2 "Post-menopausal \n women" 138. 138
## 3 "Pre-menopausal \n women" 135. 135
pferd <- final_data %>%
dplyr::select(donor, group, health, questionnaire) %>%
drop_na(group) %>%
filter( group != "Women_pre_menop_no_mens") %>%
spread(key = questionnaire, value = health) %>%
filter(First != "NA" & Second != "NA") %>%
mutate(Difference = case_when(First < Second ~ "Improved",
First == Second ~ "Stable",
TRUE ~ "Worsened")) %>%
mutate(Difference = ordered(Difference,levels=c("Worsened","Stable","Improved"))) %>%
inner_join(FD_activity_biomarker_data, by = c("donor", "group")) %>%
mutate(group = case_when(group == "Pre_menopause_women" ~ "Pre-menopausal \n women",
group == "Post_menopause_women" ~ "Post-menopausal \n women",
TRUE ~ "Men"),
group = ordered(group, levels = c( "Men", "Post-menopausal \n women", "Pre-menopausal \n women") ))
pfer <- ggplot(pferd, aes(x = Difference, y = Ferritin_beginning, color = group)) +
# geom_hline(yintercept = 15, color = "red", linetype = 3) +
geom_boxplot(outlier.alpha = 0) +
geom_beeswarm(alpha = 0.5, shape = 1) +
scale_y_log10() +
facet_grid(group ~.,scales="free_y") +
#facet_grid(group ~.) +
scale_color_manual(values = c( "#ff85a2", "#de2d26","#00BFFF" ),
limits = c("Pre-menopausal \n women", "Post-menopausal \n women","Men" )) +
theme_bw() +
theme(legend.position = "none") +
xlab("Evolution of self-rated health") +
ylab("Initial ferritin (ug/l)")
#scale_y_log10(limits=c(10,100))
ggplot2::ggsave(plot=pfer,filename = "../results/figures/hyp_ferritin.pdf",
width = fig.width,
height = fig.height,
dpi = 600,
units = "cm")
pfer
pferd %>% group_by(group, Difference) %>% summarise(mean=mean(Ferritin_beginning),median=median(Ferritin_beginning),geoMean=exp(mean(log(Ferritin_beginning))))
## # A tibble: 9 x 5
## # Groups: group [3]
## group Difference mean median geoMean
## <ord> <ord> <dbl> <dbl> <dbl>
## 1 Men Worsened 50.6 39 40.1
## 2 Men Stable 52.2 42 42.1
## 3 Men Improved 55.9 42.5 42.1
## 4 "Post-menopausal \n women" Worsened 42.1 33 34.5
## 5 "Post-menopausal \n women" Stable 40.7 33 33.0
## 6 "Post-menopausal \n women" Improved 40 33 32.4
## 7 "Pre-menopausal \n women" Worsened 34.9 26 26.6
## 8 "Pre-menopausal \n women" Stable 32.8 25 25.3
## 9 "Pre-menopausal \n women" Improved 30.0 26 25.1
pferd %>% group_by(group) %>% summarise(mean=mean(Ferritin_beginning),median=median(Ferritin_beginning),geoMean=exp(mean(log(Ferritin_beginning))))
## # A tibble: 3 x 4
## group mean median geoMean
## <ord> <dbl> <dbl> <dbl>
## 1 Men 52.2 41 41.5
## 2 "Post-menopausal \n women" 41.0 33 33.3
## 3 "Pre-menopausal \n women" 33.1 25 25.7
pbmd <- final_data %>%
dplyr::select(donor, group, health, questionnaire) %>%
drop_na(group) %>%
filter( group != "Women_pre_menop_no_mens") %>%
spread(key = questionnaire, value = health) %>%
filter(First != "NA" & Second != "NA") %>%
mutate(Difference = case_when(First < Second ~ "Improved",
First == Second ~ "Stable",
TRUE ~ "Worsened")) %>%
mutate(Difference = ordered(Difference,levels=c("Worsened","Stable","Improved"))) %>%
inner_join(FD_activity_biomarker_data, by = c("donor", "group")) %>%
mutate(group = case_when(group == "Pre_menopause_women" ~ "Pre-menopausal \n women",
group == "Post_menopause_women" ~ "Post-menopausal \n women",
TRUE ~ "Men"),
group = ordered(group, levels = c( "Men", "Post-menopausal \n women", "Pre-menopausal \n women") ))
pbm <- ggplot(pbmd, aes(x = Difference, y = BMI_diff, color = group)) +
# geom_hline(yintercept = 15, color = "red", linetype = 3) +
geom_boxplot(outlier.alpha = 0) +
geom_beeswarm(alpha = 0.5, shape = 1) +
#scale_y_log10() +
facet_grid(group ~.,scales="free_y") +
scale_color_manual(values = c( "#ff85a2", "#de2d26","#00BFFF" ),
limits = c("Pre-menopausal \n women", "Post-menopausal \n women","Men" )) +
theme_bw() +
theme(legend.position = "none") +
xlab("Evolution of self-rated health") +
ylab("BMI difference")
#ylim(-5,5)
ggplot2::ggsave(plot=pbm,filename = "../results/figures/hyp_bmi_diff.pdf",
width = fig.width,
height = fig.height,
dpi = 600,
units = "cm")
pbm
pbmd %>% group_by(group, Difference) %>% summarise(mean=mean(BMI_diff),median=median(BMI_diff))
## # A tibble: 9 x 4
## # Groups: group [3]
## group Difference mean median
## <ord> <ord> <dbl> <dbl>
## 1 Men Worsened 0.355 0.191
## 2 Men Stable 0.221 0
## 3 Men Improved 0.0310 0
## 4 "Post-menopausal \n women" Worsened 0.499 0.363
## 5 "Post-menopausal \n women" Stable 0.107 0
## 6 "Post-menopausal \n women" Improved 0.591 0.367
## 7 "Pre-menopausal \n women" Worsened 1.14 0.968
## 8 "Pre-menopausal \n women" Stable 0.706 0.363
## 9 "Pre-menopausal \n women" Improved 0.978 0.602
pbmd %>% group_by(group) %>% summarise(mean=mean(BMI_diff),median=median(BMI_diff))
## # A tibble: 3 x 3
## group mean median
## <ord> <dbl> <dbl>
## 1 Men 0.233 0
## 2 "Post-menopausal \n women" 0.275 0.191
## 3 "Pre-menopausal \n women" 0.896 0.635
pfqd <- final_data %>%
dplyr::select(donor, group, health, questionnaire) %>%
drop_na(group) %>%
filter( group != "Women_pre_menop_no_mens") %>%
spread(key = questionnaire, value = health) %>%
filter(First != "NA" & Second != "NA") %>%
mutate(Difference = case_when(First < Second ~ "Improved",
First == Second ~ "Stable",
TRUE ~ "Worsened")) %>%
mutate(Difference = ordered(Difference,levels=c("Worsened","Stable","Improved"))) %>%
inner_join(FD_activity_biomarker_data, by = c("donor", "group")) %>%
mutate(donation_frequency = nb_donations_between_questionnaires/(interval_between_quest/12) ) %>%
mutate(group = case_when(group == "Pre_menopause_women" ~ "Pre-menopausal \n women",
group == "Post_menopause_women" ~ "Post-menopausal \n women",
TRUE ~ "Men"),
group = ordered(group, levels = c( "Men", "Post-menopausal \n women", "Pre-menopausal \n women") ))
pfq <- ggplot(pfqd, aes(x = Difference, y = donation_frequency, color = group)) +
# geom_hline(yintercept = 15, color = "red", linetype = 3) +
geom_boxplot(outlier.alpha = 0) +
geom_beeswarm(alpha = 0.5, shape = 1) +
#scale_y_log10() +
facet_grid(group ~.,scales="free_y") +
scale_color_manual(values = c( "#ff85a2", "#de2d26","#00BFFF" ),
limits = c("Pre-menopausal \n women", "Post-menopausal \n women","Men" )) +
theme_bw() +
theme(legend.position = "none") +
xlab("Evolution of self-rated health") +
ylab("Doonation frequency (yearly)")
ggplot2::ggsave(plot=pfq,filename = "../results/figures/hyp_n_donations.pdf",
width = fig.width,
height = fig.height,
dpi = 600,
units = "cm")
pfq
pfqd %>% group_by(group, Difference) %>% summarise(mean=mean(donation_frequency),median=median(donation_frequency))
## # A tibble: 9 x 4
## # Groups: group [3]
## group Difference mean median
## <ord> <ord> <dbl> <dbl>
## 1 Men Worsened 2.86 2.61
## 2 Men Stable 3.12 3.11
## 3 Men Improved 2.76 2.55
## 4 "Post-menopausal \n women" Worsened 2.22 2.07
## 5 "Post-menopausal \n women" Stable 2.46 2.5
## 6 "Post-menopausal \n women" Improved 2.60 2.61
## 7 "Pre-menopausal \n women" Worsened 1.86 1.85
## 8 "Pre-menopausal \n women" Stable 1.91 1.88
## 9 "Pre-menopausal \n women" Improved 2.05 2.18
pfqd %>% group_by(group) %>% summarise(mean=mean(donation_frequency),median=median(donation_frequency))
## # A tibble: 3 x 3
## group mean median
## <ord> <dbl> <dbl>
## 1 Men 3.00 2.91
## 2 "Post-menopausal \n women" 2.41 2.48
## 3 "Pre-menopausal \n women" 1.92 1.89
pwed <- final_data %>%
dplyr::select(donor, group, health, questionnaire) %>%
drop_na(group) %>%
filter( group != "Women_pre_menop_no_mens") %>%
spread(key = questionnaire, value = health) %>%
filter(First != "NA" & Second != "NA") %>%
mutate(Difference = case_when(First < Second ~ "Improved",
First == Second ~ "Stable",
TRUE ~ "Worsened")) %>%
mutate(Difference = ordered(Difference,levels=c("Worsened","Stable","Improved"))) %>%
inner_join(FD_activity_biomarker_data, by = c("donor", "group")) %>%
mutate(group = case_when(group == "Pre_menopause_women" ~ "Pre-menopausal \n women",
group == "Post_menopause_women" ~ "Post-menopausal \n women",
TRUE ~ "Men"),
group = ordered(group, levels = c( "Men", "Post-menopausal \n women", "Pre-menopausal \n women") ))
pwe <- ggplot(pwed, aes(x = Difference, y = starting_weight, color = group)) +
# geom_hline(yintercept = 15, color = "red", linetype = 3) +
geom_boxplot(outlier.alpha = 0) +
geom_beeswarm(alpha = 0.5, shape = 1) +
scale_y_log10() +
facet_grid(group ~.,scales="free_y") +
scale_color_manual(values = c( "#ff85a2", "#de2d26","#00BFFF" ),
limits = c("Pre-menopausal \n women", "Post-menopausal \n women","Men" )) +
theme_bw() +
theme(legend.position = "none") +
xlab("Evolution of self-rated health") +
ylab("Initial weight (kg)")
ggplot2::ggsave(plot=pwe,filename = "../results/figures/hyp_weight.pdf",
width = fig.width,
height = fig.height,
dpi = 600,
units = "cm")
pwe
pwed %>% group_by(group, Difference) %>% summarise(mean=mean(starting_weight),median=median(starting_weight))
## # A tibble: 9 x 4
## # Groups: group [3]
## group Difference mean median
## <ord> <ord> <dbl> <dbl>
## 1 Men Worsened 88.8 85
## 2 Men Stable 84.2 83
## 3 Men Improved 82.0 80
## 4 "Post-menopausal \n women" Worsened 72.2 70
## 5 "Post-menopausal \n women" Stable 71.0 69.5
## 6 "Post-menopausal \n women" Improved 69.2 68
## 7 "Pre-menopausal \n women" Worsened 72.1 69
## 8 "Pre-menopausal \n women" Stable 70.1 67
## 9 "Pre-menopausal \n women" Improved 69.8 65
pwed %>% group_by(group) %>% summarise(mean=mean(starting_weight),median=median(starting_weight))
## # A tibble: 3 x 3
## group mean median
## <ord> <dbl> <dbl>
## 1 Men 85.2 83
## 2 "Post-menopausal \n women" 71.1 70
## 3 "Pre-menopausal \n women" 70.7 67